Merged [18171]: An exception like [http://www.visualdistortion.org/crash/view.jsp...
[adiumx.git] / Other / Adium Spotlight Importer / GetMetadataForHTMLLog-Additions.m
blobef6c353ed5db586ed319b133c533d9988aed3e06
1 //
2 //  GetMetadataForHTMLLog-Additions.m
3 //  AdiumSpotlightImporter
4 //
5 //  Created by Evan Schoenberg on 5/25/06.
6 //
8 #import "GetMetadataForHTMLLog-Additions.h"
11  * @brief These additions are all from AIUtilities
12  *
13  * The spotlight importer should include this file to get these specific additions.
14  * If the GetMetadataForHTMLLog class is used in a situation in which AIUtilities is linked in already, it is
15  * not necessary to include this implementation file.
16  */
17 @implementation NSScanner (AdiumSpotlightImporterAdditions)
19 - (BOOL)scanUnsignedInt:(unsigned int *)unsignedIntValue
21         //skip characters if necessary
22         NSCharacterSet *skipSet = [self charactersToBeSkipped];
23         [self setCharactersToBeSkipped:nil];
24         [self scanCharactersFromSet:skipSet intoString:NULL];
25         [self setCharactersToBeSkipped:skipSet];
26         
27         NSString *string = [self string];
28         NSRange range = NSMakeRange([self scanLocation], 0);
29         register unsigned length = [string length] - range.location; //register because it is used in the loop below.
30         range.length = length;
31         
32         unichar *buf = malloc(length * sizeof(unichar));
33         [string getCharacters:buf range:range];
34         
35         register unsigned i = 0;
36         
37         if (length && (buf[i] == '+')) {
38                 ++i;
39         }
40         if (i >= length) return NO;
41         if ((buf[i] < '0') || (buf[i] > '9')) return NO;
42         
43         unsigned total = 0;
44         while (i < length) {
45                 if ((buf[i] >= '0') && (buf[i] <= '9')) {
46                         total *= 10;
47                         total += buf[i] - '0';
48                         ++i;
49                 } else {
50                         break;
51                 }
52         }
53         [self setScanLocation:i];
54         *unsignedIntValue = total;
55         return YES;
58 @end
60 //From AIUtilities
61 @implementation NSString (AdiumSpotlightImporterAdditions)
63 BOOL AIGetSurrogates(UTF32Char in, UTF16Char *outHigh, UTF16Char *outLow)
65         if (in < 0x10000) {
66                 if (outHigh) *outHigh = 0;
67                 if (outLow)  *outLow  = in;
68                 return NO;
69         } else {
70                 enum {
71                         UTF32LowShiftToUTF16High = 10,
72                         UTF32HighShiftToUTF16High,
73                         UTF16HighMask = 31,  //0b0000 0111 1100 0000
74                         UTF16LowMask  = 63,  //0b0000 0000 0011 1111
75                         UTF32LowMask = 1023, //0b0000 0011 1111 1111
76                         UTF16HighAdditiveMask = 55296, //0b1101 1000 0000 0000
77                         UTF16LowAdditiveMask  = 56320, //0b1101 1100 0000 0000
78                 };
79                 
80                 if (outHigh) {
81                         *outHigh = \
82                         ((in >> UTF32HighShiftToUTF16High) & UTF16HighMask) \
83                         | ((in >> UTF32LowShiftToUTF16High) & UTF16LowMask) \
84                         | UTF16HighAdditiveMask;
85                 }
86                 
87                 if (outLow) {
88                         *outLow = (in & UTF32LowMask) | UTF16LowAdditiveMask;
89                 }
90                 
91                 return YES;
92         }
95 - (NSString *)stringByUnescapingFromHTML
97         if ([self length] == 0) return [[self copy] autorelease]; //avoids various RangeExceptions.
98         
99         static NSString *ampersand = @"&", *semicolon = @";";
100         
101         NSString *segment = nil, *entity = nil;
102         NSScanner *scanner = [NSScanner scannerWithString:self];
103         [scanner setCaseSensitive:YES];
104         unsigned myLength = [self length];
105         NSMutableString *result = [NSMutableString string];
106         
107         do {
108                 if ([scanner scanUpToString:ampersand intoString:&segment] || [self characterAtIndex:[scanner scanLocation]] == '&') {
109                         if (segment) {
110                                 [result appendString:segment];
111                                 segment = nil;
112                         }
113                         if (![scanner isAtEnd]) {
114                                 [scanner setScanLocation:[scanner scanLocation]+1];
115                         }
116                 }
117                 if ([scanner scanUpToString:semicolon intoString:&entity]) {
118                         unsigned number;
119                         if ([entity characterAtIndex:0] == '#') {
120                                 NSScanner       *numScanner;
121                                 unichar         secondCharacter;
122                                 BOOL            appendIt = NO;
123                                 
124                                 numScanner = [NSScanner scannerWithString:entity];
125                                 [numScanner setCaseSensitive:YES];
126                                 secondCharacter = [entity characterAtIndex:1];
127                                 
128                                 if (secondCharacter == 'x' || secondCharacter == 'X') {
129                                         //hexadecimal: "#x..." or "#X..."
130                                         [numScanner setScanLocation:2];
131                                         appendIt = [numScanner scanHexInt:&number];
132                                         
133                                 } else {
134                                         //decimal: "#..."
135                                         [numScanner setScanLocation:1];
136                                         appendIt = [numScanner scanUnsignedInt:&number];
137                                 }
138                                 
139                                 if (appendIt) {
140                                         unichar chars[2] = { number, 0xffff };
141                                         CFIndex length = 1;
142                                         if (number > 0xffff) {
143                                                 //split into surrogate pair
144                                                 AIGetSurrogates(number, &chars[0], &chars[1]);
145                                                 ++length;
146                                         }
147                                         CFStringAppendCharacters((CFMutableStringRef)result, chars, length);
148                                 }
149                         } else {
150                                 //named entity. for now, we only support the five essential ones.
151                                 static NSDictionary *entityNames = nil;
152                                 if (entityNames == nil) {
153                                         entityNames = [[NSDictionary alloc] initWithObjectsAndKeys:
154                                                 [NSNumber numberWithUnsignedInt:'"'], @"quot",
155                                                 [NSNumber numberWithUnsignedInt:'&'], @"amp",
156                                                 [NSNumber numberWithUnsignedInt:'<'], @"lt",
157                                                 [NSNumber numberWithUnsignedInt:'>'], @"gt",
158                                                 [NSNumber numberWithUnsignedInt:' '], @"nbsp",
159                                                 nil];
160                                 }
161                                 number = [[entityNames objectForKey:[entity lowercaseString]] unsignedIntValue];
162                                 if (number) {
163                                         [result appendFormat:@"%C", (unichar)number];
164                                 }
165                         }
166                         if (![scanner isAtEnd]) {
167                                 [scanner setScanLocation:[scanner scanLocation]+1];
168                         }
169                 } //if ([scanner scanUpToString:semicolon intoString:&entity])
170         } while ([scanner scanLocation] < myLength);
171         //      NSLog(@"unescaped %@\ninto %@", self, result);
172         return result;
177  * @brief Read a string from a file, assuming it to be UTF8
179  * If it can not be read as UTF8, it will be read as ASCII.
180  */
181 + (NSString *)stringWithContentsOfUTF8File:(NSString *)path
183         NSString        *string;
184         
185         if ((floor(kCFCoreFoundationVersionNumber) > kCFCoreFoundationVersionNumber10_3)) {
186                 NSError *error = nil;
187                 
188                 string = [NSString stringWithContentsOfFile:path
189                                                                                    encoding:NSUTF8StringEncoding 
190                                                                                           error:&error];
191                 
192                 if (error) {
193                         BOOL    handled = NO;
194                         
195                         if ([[error domain] isEqualToString:NSCocoaErrorDomain]) {
196                                 int             errorCode = [error code];
197                                 
198                                 //XXX - I'm sure these constants are defined somewhere, but I can't find them. -eds
199                                 if (errorCode == 260) {
200                                         //File not found.
201                                         string = nil;
202                                         handled = YES;
203                                         
204                                 } else if (errorCode == 261) {
205                                         /* Reason: File could not be opened using text encoding Unicode (UTF-8).
206                                         * Description: Text encoding Unicode (UTF-8) is not applicable.
207                                         *
208                                         * We couldn't read the file as UTF8.  Let the system try to determine the encoding.
209                                         */
210                                         NSError                         *newError = nil;
211                                         
212                                         string = [NSString stringWithContentsOfFile:path
213                                                                                                            encoding:NSASCIIStringEncoding
214                                                                                                                   error:&newError];
215                                         
216                                         //If there isn't a new error, we recovered reasonably successfully...
217                                         if (!newError) {
218                                                 handled = YES;
219                                         }
220                                 }
221                         }
222                         
223                         if (!handled) {
224                                 NSLog(@"Error reading %@:\n%@; %@.",path,
225                                           [error localizedDescription], [error localizedFailureReason]);
226                         }
227                 }
228                 
229         } else {
230                 NSData  *data = [NSData dataWithContentsOfFile:path];
231                 
232                 if (data) {
233                         string = [[[NSString alloc] initWithData:data
234                                                                                         encoding:NSUTF8StringEncoding] autorelease];
235                         if (!string) {
236                                 string = [[[NSString alloc] initWithData:data
237                                                                                                 encoding:NSASCIIStringEncoding] autorelease];                   
238                         }
239                         
240                         if (!string) {
241                                 NSLog(@"Error reading %@",path);
242                         }
243                 } else {
244                         //File not found
245                         string = nil;
246                 }
247         }
248         
249         return string;
252 @end